In [ ]:
from sklearn import datasets
import numpy as np
In [ ]:
%pylab inline
In [ ]:
digits = datasets.load_digits()
In [ ]:
print digits.DESCR
In [ ]:
print 'target:', digits.target[0]
print 'features: \n', digits.data[0]
print 'number of features:', len(digits.data[0])
In [ ]:
#не будет работать: Invalid dimensions for image data
pylab.imshow(digits.data[0])
In [ ]:
digits.data[0].shape
In [ ]:
print digits.data[0].reshape(8,8)
In [ ]:
digits.data[0].reshape(8,8).shape
In [ ]:
pylab.imshow(digits.data[0].reshape(8,8))
In [ ]:
print digits.keys()
In [ ]:
print digits.images[0]
In [ ]:
pylab.imshow(digits.images[0])
In [ ]:
pyplot.figure(figsize(8, 8))
pyplot.subplot(2, 2, 1)
pylab.imshow(digits.images[0])
pyplot.subplot(2, 2, 2)
pylab.imshow(digits.images[0], cmap = 'hot')
pyplot.subplot(2, 2, 3)
pylab.imshow(digits.images[0], cmap = 'gray')
pyplot.subplot(2, 2, 4)
pylab.imshow(digits.images[0], cmap = 'gray', interpolation = 'nearest')
In [ ]:
pyplot.figure(figsize(20, 8))
for plot_number, plot in enumerate(digits.images[:10]):
pyplot.subplot(2, 5, plot_number + 1)
pylab.imshow(plot, cmap = 'gray')
pylab.title('digit: ' + str(digits.target[plot_number]))
In [ ]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from collections import Counter
In [ ]:
data = digits.data[:1000]
labels = digits.target[:1000]
In [ ]:
print Counter(labels)
In [ ]:
pylab.figure(figsize = (10, 6))
pylab.bar(Counter(labels).keys(), Counter(labels).values())
In [ ]:
classifier = KNeighborsClassifier()
In [ ]:
classifier.fit(data, labels)
In [ ]:
print classification_report(classifier.predict(data), labels)
In [ ]:
from sklearn import random_projection
In [ ]:
projection = random_projection.SparseRandomProjection(n_components = 2, random_state = 0)
data_2d_rp = projection.fit_transform(data)
In [ ]:
pylab.figure(figsize=(10, 6))
pylab.scatter(data_2d_rp[:, 0], data_2d_rp[:, 1], c = labels)
In [ ]:
classifier.fit(data_2d_rp, labels)
print classification_report(classifier.predict(data_2d_rp), labels)
In [ ]:
from sklearn.decomposition import RandomizedPCA
In [ ]:
pca = RandomizedPCA(n_components = 2, random_state = 0)
data_2d_pca = pca.fit_transform(data)
In [ ]:
pylab.figure(figsize = (10, 6))
pylab.scatter(data_2d_pca[:, 0], data_2d_pca[:, 1], c = labels)
In [ ]:
classifier.fit(data_2d_pca, labels)
print classification_report(classifier.predict(data_2d_pca), labels)
In [ ]:
from sklearn import manifold
In [ ]:
mds = manifold.MDS(n_components = 2, n_init = 1, max_iter = 100)
data_2d_mds = mds.fit_transform(data)
In [ ]:
pylab.figure(figsize=(10, 6))
pylab.scatter(data_2d_mds[:, 0], data_2d_mds[:, 1], c = labels)
In [ ]:
classifier.fit(data_2d_mds, labels)
print classification_report(classifier.predict(data_2d_mds), labels)
In [ ]:
tsne = manifold.TSNE(n_components = 2, init = 'pca', random_state = 0)
data_2d_tsne = tsne.fit_transform(data)
In [ ]:
pylab.figure(figsize = (10, 6))
pylab.scatter(data_2d_tsne[:, 0], data_2d_tsne[:, 1], c = labels)
In [ ]:
classifier.fit(data_2d_tsne, labels)
print classification_report(classifier.predict(data_2d_tsne), labels)